Read in the splicing results returned by MAJIQ and make a volcano plot, only highlight genes of interest with a label.
ipsc_splicing = fread(file.path(here::here(),"data","ipsc_splicing_results.csv"))
ipsc_de = fread(file.path(here::here(),"data","ipsc_differential_expression.csv"))
splicing_dots_tables <- ipsc_splicing %>%
mutate(junction_name = case_when(gene_name %in% c("UNC13A","AGRN",
"UNC13B","PFKP","SETD5",
"ATG4B","STMN2") &
p_d_psi_0_10_per_lsv_junction > 0.9 &
deltaPSI > 0 ~ gene_name,
T ~ "")) %>%
mutate(`Novel Junction` = de_novo_junctions == 0) %>%
mutate(log10_test_stat = -log10(1 - p_d_psi_0_10_per_lsv_junction)) %>%
mutate(log10_test_stat = ifelse(is.infinite(log10_test_stat), 16, log10_test_stat)) %>%
mutate(graph_alpha = ifelse(p_d_psi_0_10_per_lsv_junction > 0.9, 1, 0.2)) %>%
mutate(label_junction = case_when(gene_name %in% c("UNC13A","AGRN",
"UNC13B","PFKP","SETD5",
"ATG4B","STMN2") &
p_d_psi_0_10_per_lsv_junction > 0.9 &
deltaPSI > 0 ~ junction_name,
T ~ ""))
fig1a = ggplot() +
geom_point(data = splicing_dots_tables %>% filter(de_novo_junctions != 0),
aes(x = deltaPSI, y =log10_test_stat,
alpha = graph_alpha,,fill = "Annotated Junction"), pch = 21, size = 10) +
geom_point(data = splicing_dots_tables %>% filter(de_novo_junctions == 0),
aes(x = deltaPSI, y =log10_test_stat,
alpha = graph_alpha,fill = "Novel Junction"), pch = 21, size = 10) +
geom_text_repel(data = splicing_dots_tables[junction_name != ""],
aes(x = deltaPSI, y =log10_test_stat,
label = label_junction,
color = as.character(de_novo_junctions)), point.padding = 0.3,
nudge_y = 0.2,
min.segment.length = 0,
box.padding = 2,
size=6,show.legend = F) +
geom_hline(yintercept = -log10(1 - .9)) +
geom_vline(xintercept = -0,linetype="dotted") +
scale_fill_manual(name = "",
breaks = c("Annotated Junction", "Novel Junction"),
values = c("Annotated Junction" = "#648FFF", "Novel Junction" = "#fe6101") ) +
scale_color_manual(name = "",
breaks = c("0", "1"),
values = c("1" = "#648FFF", "0" = "#fe6101") ) +
guides(alpha = FALSE, size = FALSE) +
theme(legend.position = 'top') +
ggpubr::theme_pubr() +
xlab("delta PSI") +
ylab(expression(paste("-Lo", g[10], " Test Statistic"))) +
theme(text = element_text(size = 24)) +
theme(legend.text=element_text(size=22)) +
xlim(-1,1) +
scale_x_continuous(labels = scales::percent)
Scale for 'x' is already present. Adding another scale for 'x', which will
replace the existing scale.
de_table = ipsc_de %>%
mutate(contains_cryptic = gene_name %in% splicing_dots_tables[cryptic_junction == T,unique(gene_name)]) %>%
mutate(contains_cryptic = as.character(as.numeric(contains_cryptic))) %>%
mutate(label_junction = case_when(gene_name %in% c("UNC13A","AGRN",
"UNC13B","PFKP","SETD5",
"ATG4B","STMN2","TARDBP") ~ gene_name,
T ~ "")) %>%
mutate(graph_alpha = ifelse(padj < 0.1, 1, 0.2)) %>%
mutate(y_data = -log10(padj))
fig1b = ggplot(data = de_table) +
geom_point(data = de_table %>% filter(contains_cryptic == "0"),
aes(x = log2FoldChange, y = -log10(padj),
alpha = graph_alpha,fill = "No Cryptic"), pch = 21, size = 10) +
geom_point(data = de_table %>% filter(contains_cryptic == "1"),
aes(x = log2FoldChange, y = -log10(padj),
alpha = graph_alpha,fill = "Contains Cryptic"), pch = 21, size = 10)+
geom_text_repel(data = de_table[label_junction != ""],max.overlaps = 500,
aes(x = log2FoldChange,
y = -log10(padj),
label = label_junction,
color = as.character(contains_cryptic)),
nudge_y = 5,
min.segment.length = 0,
box.padding = 4,
size=6,show.legend = F) +
scale_fill_manual(name = "",
breaks = c("No Cryptic", "Contains Cryptic"),
values = c("No Cryptic" = "#648FFF", "Contains Cryptic" = "#fe6101") ) +
scale_color_manual(name = "",
breaks = c("1", "0"),
values = c("1" = "#fe6101", "0" = "#648FFF") ) +
xlim(-7.5,7.5) +
ylab(expression(paste("-Lo", g[10], " P-value"))) +
guides(alpha = FALSE, size = FALSE) +
theme(legend.position = 'top') +
ggpubr::theme_pubr() +
xlab(expression(paste("Lo", g[2], " Fold Change"))) +
theme(text = element_text(size = 24)) +
theme(legend.text=element_text(size=22)) +
geom_hline(yintercept = -log10(0.1)) +
geom_vline(xintercept=c(0), linetype="dotted")
print(fig1a)
print(fig1b)
The ‘C/G’ tells which genotypes were supported by RNA-seq on rs12973192. The SK-N-DZ cell lines are het, as is the WTC11 cell line. SH-SY5Y cells are homozygote for the major allele. There was variability on the Klim hMN set on allelic expression.
rel_rna_cryptic_amount = data.table::fread(file.path(here::here(),"data","kd_experiments_relative_rna_and_unc13a_cryptic_junction_counts.csv"))
rel_rna_cryptic_amount[,cryptic_psi_full := ( UNC13A_3prime +
UNC13A_5prime + UNC13A_5prime_2 +
UNC13A_5prime_3) / (UNC13A_annotated + UNC13A_3prime +
UNC13A_5prime + UNC13A_5prime_2 +
UNC13A_5prime_3)]
rel_rna_cryptic_amount %>%
ggbarplot(,
x = "source",
add = c("mean_se","jitter"),
y = "cryptic_psi_full",
fill = 'condition',
color = 'condition',
position = position_dodge(0.8)) +
ggpubr::theme_pubr() +
scale_fill_manual(name = "",
values = c("#40B0A6","#E1BE6A")
) +
scale_color_manual(name = "",
values = c("#1C2617","#262114")
) +
ylab("UNC13A CE PSI") +
xlab("") +
guides(color = FALSE) +
scale_y_continuous(labels = scales::percent) +
theme(text = element_text(size = 20,family = 'sans'),
legend.text = element_text(size = 36,family = 'sans'),
axis.title.y = element_text(size = 28),
axis.text.y = element_text(size = 28))
rel_rna_cryptic_amount %>%
ggbarplot(,
x = "condition",
add = c("mean_se","jitter"),
y = "unc13b_nmd_exon_psi",
fill = 'condition',
color = 'condition',
position = position_dodge(0.8),facet.by = 'source') +
ggpubr::theme_pubr() +
scale_fill_manual(name = "",
values = c("#40B0A6","#E1BE6A")
) +
scale_color_manual(name = "",
values = c("#1C2617","#262114")
) +
ylab("UNC13B \nNMD Exon PSI") +
xlab("") +
guides(color = FALSE) +
theme(text = element_text(size = 20,family = 'sans'),
legend.text = element_text(size = 36,family = 'sans'),
axis.title.y = element_text(size = 28),
axis.text.y = element_text(size = 28)) +
facet_wrap(~source) +
stat_compare_means() +
scale_y_continuous(labels = scales::percent)
rel_rna_cryptic_amount %>%
filter(condition != "control") %>%
ggplot() +
geom_point(aes(x = TARDBP, y = cryptic_psi_full, fill = source),pch = 21,size = 4) +
stat_cor(aes(x = TARDBP, y = cryptic_psi_full),size = 12,method = 's',cor.coef.name = 'rho') +
geom_smooth(aes(x = TARDBP, y = cryptic_psi_full),color = "black",method = 'lm') +
ggpubr::theme_pubr() +
theme(text = element_text(size = 20)) +
scale_x_continuous(labels = scales::percent) +
ylab("UNC13A CE PSI") +
theme(legend.title=element_blank()) +
theme(legend.position = 'bottom') +
scale_y_continuous(labels = scales::percent)
`geom_smooth()` using formula 'y ~ x'
rel_rna_cryptic_amount %>%
filter(condition != "control") %>%
ggplot() +
geom_point(aes(x = UNC13A, y = cryptic_psi_full, fill = source),pch = 21,size = 6) +
stat_cor(aes(x = UNC13A, y = cryptic_psi_full),size = 12,
method = 'spearman',
cor.coef.name = 'rho') +
geom_smooth(aes(x = UNC13A, y = cryptic_psi_full),color = "black",method = 'lm') +
ggpubr::theme_pubr() +
theme(text = element_text(size = 20)) +
scale_x_continuous(labels = scales::percent) +
scale_y_continuous(labels = scales::percent) +
ylab("UNC13A CE PSI") +
expand_limits(y = 1) +
theme(legend.title=element_blank()) +
theme(legend.position = 'bottom') +
theme(text = element_text(size = 18,family = 'sans'),
legend.text = element_text(size = 20,family = 'sans'),
axis.title = element_text(size = 32),
axis.text = element_text(size = 32))
`geom_smooth()` using formula 'y ~ x'
## UNC13A RNA and UNC13A IR
rel_rna_cryptic_amount %>%
filter(condition != "control") %>%
ggplot() +
geom_point(aes(x = UNC13A, y = normalized_unc13a_ir, fill = source),pch = 21,size = 4) +
stat_cor(aes(x = UNC13A, y = normalized_unc13a_ir),size = 12,cor.coef.name = 'rho',method = 's') +
geom_smooth(aes(x = UNC13A, y = normalized_unc13a_ir),color = "black",method = 'lm') +
ggpubr::theme_pubr() +
theme(text = element_text(size = 20)) +
scale_x_continuous(labels = scales::percent) +
ylab("UNC13A Normalized IR") +
theme(legend.title=element_blank()) +
theme(legend.position = 'bottom')
`geom_smooth()` using formula 'y ~ x'
## TARDBP RNA and UNC13A IR
rel_rna_cryptic_amount %>%
filter(condition != "control") %>%
ggplot() +
geom_point(aes(x = TARDBP, y = normalized_unc13a_ir, fill = source),pch = 21,size = 4) +
stat_cor(aes(x = TARDBP, y = normalized_unc13a_ir),size = 12) +
geom_smooth(aes(x = TARDBP, y = normalized_unc13a_ir),color = "black",method = 'lm') +
ggpubr::theme_pubr() +
theme(text = element_text(size = 20)) +
scale_x_continuous(labels = scales::percent) +
ylab("UNC13A Normalized IR") +
theme(legend.title=element_blank()) +
theme(legend.position = 'bottom')
`geom_smooth()` using formula 'y ~ x'
rel_rna_cryptic_amount %>%
ggbarplot(,
x = "source",
add = c("mean_se","jitter"),
y = "cryptic_psi_full",
fill = 'condition',
color = 'condition',
position = position_dodge(0.8)) +
ggpubr::theme_pubr() +
scale_fill_manual(
values = c("#40B0A6","#E1BE6A")
) +
scale_color_manual(
values = c("#1C2617","#262114")
) +
ylab("UNC13A CE PSI") +
xlab("") +
guides(color = FALSE)
stmn2 = rel_rna_cryptic_amount %>%
ggbarplot(,
x = "source",
add = c("mean_se","jitter"),
y = "stmn_2_cryptic_psi",
fill = 'condition',
color = 'condition',
position = position_dodge(0.8)) +
ggpubr::theme_pubr() +
scale_fill_manual(
values = c("#40B0A6","#E1BE6A")
) +
scale_color_manual(
values = c("#1C2617","#262114")
) +
ylab("STMN2 Cryptic PSI") +
xlab("") +
guides(color = FALSE)
print(stmn2)
unc13b_ir = rel_rna_cryptic_amount %>%
ggbarplot(,
x = "source",
add = c("mean_se","jitter"),
y = "normalized_unc13b_ir",
fill = 'condition',
color = 'condition',
position = position_dodge(0.8)) +
ggpubr::theme_pubr() +
scale_fill_manual(
values = c("#40B0A6","#E1BE6A")
) +
scale_color_manual(
values = c("#1C2617","#262114")
) +
ylab("Normalized UNC13B \nIntron Retention Ratio") +
xlab("") +
guides(color = FALSE) +
theme(legend.position = "none") +
theme(text = element_text(size = 18)) +
guides(color = FALSE) +
theme(text = element_text(size = 20,family = 'sans'),
legend.text = element_text(size = 36,family = 'sans'),
axis.title.y = element_text(size = 28),
axis.text.y = element_text(size = 28))
print(unc13b_ir)
unc13a_ir = rel_rna_cryptic_amount %>%
ggbarplot(,
x = "source",
add = c("mean_se","jitter"),
y = "normalized_unc13a_ir",
fill = 'condition',
color = 'condition',
position = position_dodge(0.8)) +
ggpubr::theme_pubr() +
scale_fill_manual(
values = c("#40B0A6","#E1BE6A")
) +
scale_color_manual(
values = c("#1C2617","#262114")
) +
ylab("Normalized UNC13A \nIntron Retention Ratio") +
xlab("") +
theme(legend.position = "none") +
guides(color = FALSE) +
theme(text = element_text(size = 20,family = 'sans'),
legend.text = element_text(size = 36,family = 'sans'),
axis.title.y = element_text(size = 28),
axis.text.y = element_text(size = 28))
text_table = fread(file.path(here::here(),"data","deseq2_cellline_fold_change.csv"))
rel_rna_cryptic_amount %>%
ggbarplot(,
x = "source",
add = c("mean_se","jitter"),
y = "UNC13A",
fill = 'condition',
color = 'condition',
position = position_dodge(0.8)) +
ggpubr::theme_pubr() +
scale_fill_manual(
values = c("#40B0A6","#E1BE6A")
) +
scale_color_manual(
values = c("#1C2617","#262114")
) +
ylab("UNC13A") +
xlab("") +
theme(legend.position = "none") +
guides(color = FALSE) +
theme(text = element_text(size = 20,family = 'sans'),
legend.text = element_text(size = 36,family = 'sans'),
axis.title.y = element_text(size = 28),
axis.text.y = element_text(size = 28)) +
scale_y_continuous(labels = scales::percent) +
geom_text(data = text_table[gene_name == "UNC13A"],aes(x = source, y= 1.35,label = padj_plot),size = 8) +
geom_text(data = text_table[gene_name == "UNC13A"],aes(x = source, y= 1.2,label = log2FoldChange_plot),size = 8)
text_table = fread(file.path(here::here(),"data","deseq2_cellline_fold_change.csv"))
rel_rna_cryptic_amount %>%
ggbarplot(,
x = "source",
add = c("mean_se","jitter"),
y = "UNC13A",
fill = 'condition',
color = 'condition',
position = position_dodge(0.8)) +
ggpubr::theme_pubr() +
scale_fill_manual(
values = c("#40B0A6","#E1BE6A")
) +
scale_color_manual(
values = c("#1C2617","#262114")
) +
ylab("UNC13B") +
xlab("") +
theme(legend.position = "none") +
guides(color = FALSE) +
theme(text = element_text(size = 20,family = 'sans'),
legend.text = element_text(size = 36,family = 'sans'),
axis.title.y = element_text(size = 28),
axis.text.y = element_text(size = 28)) +
scale_y_continuous(labels = scales::percent) +
geom_text(data = text_table[gene_name == "UNC13B"],aes(x = source, y= 1.35,label = padj_plot),size = 8) +
geom_text(data = text_table[gene_name == "UNC13B"],aes(x = source, y= 1.2,label = log2FoldChange_plot),size = 8)
text_table = fread(file.path(here::here(),"data","deseq2_cellline_fold_change.csv"))
rel_rna_cryptic_amount %>%
ggbarplot(,
x = "source",
add = c("mean_se","jitter"),
y = "TARDBP",
fill = 'condition',
color = 'condition',
position = position_dodge(0.8)) +
ggpubr::theme_pubr() +
scale_fill_manual(
values = c("#40B0A6","#E1BE6A")
) +
scale_color_manual(
values = c("#1C2617","#262114")
) +
ylab("TARDBP") +
xlab("") +
theme(legend.position = "none") +
guides(color = FALSE) +
theme(text = element_text(size = 20,family = 'sans'),
legend.text = element_text(size = 36,family = 'sans'),
axis.title.y = element_text(size = 28),
axis.text.y = element_text(size = 28)) +
scale_y_continuous(labels = scales::percent,expand = expansion(mult = c(0, .1))) +
geom_text(data = text_table[gene_name == "TARDBP"],aes(x = source, y= 1.35,label = padj_plot),size = 8) +
geom_text(data = text_table[gene_name == "TARDBP"],aes(x = source, y= 1.2,label = log2FoldChange_plot),size = 8)
## Scatterplot stmn2 normalized TARDBP
rel_rna_cryptic_amount %>%
filter(condition != "control") %>%
ggplot() +
geom_point(aes(x = TARDBP, y = stmn_2_cryptic_psi, fill = source),pch = 21,size = 4) +
stat_cor(aes(x = TARDBP, y = stmn_2_cryptic_psi)) +
theme(legend.position = 'bottom')
rel_rna_cryptic_amount %>%
filter(condition != "control") %>%
ggplot() +
geom_point(aes(x = TARDBP, y = normalized_unc13b_ir, fill = source),pch = 21,size = 4) +
stat_cor(aes(x = TARDBP, y = normalized_unc13b_ir)) +
theme(legend.position = 'bottom')
rel_rna_cryptic_amount %>%
filter(condition != "control") %>%
ggplot() +
geom_point(aes(x = cryptic_psi_full,
y = normalized_unc13a_ir, fill = source),pch = 21,size = 4) +
stat_cor(aes(x = cryptic_psi_full,
y = normalized_unc13a_ir)) +
theme(legend.position = 'bottom')
rel_rna_cryptic_amount %>%
filter(condition != "control") %>%
ggplot() +
geom_point(aes(x = cryptic_psi_full, y = normalized_unc13b_ir, fill = source),pch = 21,size = 4) +
stat_cor(aes(x = cryptic_psi_full, y = normalized_unc13b_ir)) +
theme(legend.position = 'bottom')+
facet_wrap(~source)
rel_rna_cryptic_amount %>%
filter(condition != "control") %>%
ggplot() +
geom_point(aes(x = unc13b_nmd_exon_psi, y = normalized_unc13b_ir, fill = source),pch = 21,size = 4) +
stat_cor(aes(x = unc13b_nmd_exon_psi, y = normalized_unc13b_ir)) +
theme(legend.position = 'bottom')+
facet_wrap(~source)
fbp_sy <- read_csv(file.path(here::here(),"data","results_17022021_sy5y.csv"))
── Column specification ────────────────────────────────────────────────────────
cols(
value = col_double(),
condition = col_character(),
gene = col_character()
)
fbp_sy$condition <- as.factor(fbp_sy$condition)
fbp_sy$condition <- factor(fbp_sy$condition,levels(fbp_sy$condition)[c(2,1)])
stattest_sy <- fbp_sy %>%
group_by(gene) %>%
t_test(value ~ 1, mu = 1) %>%
add_significance() %>%
add_xy_position(x="gene")
stattest_sy
# A tibble: 4 x 14
gene .y. group1 group2 n statistic df p p.signif y.position
<chr> <chr> <chr> <chr> <int> <dbl> <dbl> <dbl> <chr> <dbl>
1 hnRNPL value 1 null mod… 10 2.64 9 0.0271 * 5.12
2 STMN2 value 1 null mod… 10 1.63 9 0.138 ns 2.15
3 UNC13A value 1 null mod… 10 2.94 9 0.0164 * 9.69
4 UNC13B value 1 null mod… 10 2.79 9 0.0212 * 7.15
# … with 4 more variables: groups <list>, x <dbl>, xmin <dbl>, xmax <dbl>
nmd_plot_sy <- ggbarplot(fbp_sy,
x = 'gene',
y = 'value',
add = c("mean_se","jitter"),
fill = 'condition',
color = 'condition',
position = position_dodge(0.8),
dot.size = 10) +
scale_y_continuous(labels = scales::percent) +
ggpubr::theme_pubr() +
scale_fill_manual(values = c("#40B0A6","#E1BE6A")) +
scale_color_manual(values = c("#1C2617","#262114")) +
ylab("Percent of transcript after CHX treatment") +
xlab("") +
stat_pvalue_manual(stattest_sy, label="p.signif", y.position = 9)
nmd_plot_sy
fbp_dz <- read_csv(file.path(here::here(),"data","results_17022021_dz.csv"))
── Column specification ────────────────────────────────────────────────────────
cols(
value = col_double(),
condition = col_character(),
gene = col_character()
)
fbp_dz$condition <- as.factor(fbp_dz$condition)
fbp_dz$condition <- factor(fbp_dz$condition,levels(fbp_dz$condition)[c(2,1)])
stattest_dz <- fbp_dz %>%
group_by(gene) %>%
t_test(value ~ 1, mu = 1) %>%
add_significance() %>%
add_xy_position(x="gene")
stattest_dz
# A tibble: 4 x 14
gene .y. group1 group2 n statistic df p p.signif y.position
<chr> <chr> <chr> <chr> <int> <dbl> <dbl> <dbl> <chr> <dbl>
1 hNRNPL value 1 null mod… 10 2.28 9 0.0482 * 8.40
2 STMN2 value 1 null mod… 10 1.59 9 0.146 ns 4.39
3 UNC13A value 1 null mod… 10 2.82 9 0.02 * 17.5
4 UNC13B value 1 null mod… 10 2.72 9 0.0235 * 28.1
# … with 4 more variables: groups <list>, x <dbl>, xmin <dbl>, xmax <dbl>
nmd_plot_dz <- ggbarplot(fbp_dz,
x = 'gene',
y = 'value',
add = c("mean_se","jitter"),
fill = 'condition',
color = 'condition',
position = position_dodge(0.8),
dot.size = 10) +
scale_y_continuous(labels = scales::percent) +
ggpubr::theme_pubr() +
scale_fill_manual(values = c("#40B0A6","#E1BE6A")) +
scale_color_manual(values = c("#1C2617","#262114")) +
ylab("Percent of transcript after CHX treatment") +
xlab("") +
stat_pvalue_manual(stattest_dz, label="p.signif", y.position = 26)
nmd_plot_dz
peptides = fread(file.path(here::here(),"data","peptide_abdundance.csv"))
peptides %>%
mutate(gene = fct_relevel(gene, "UNC13A","UNC13B")) %>%
ggbarplot(,
x = "condition",
add = c("mean_se"),
y = "protein",
fill = 'condition',
color = 'condition',
position = position_dodge(0.8),
facet.by = 'gene') +
ggpubr::theme_pubr() +
scale_fill_manual(
values = c("#40B0A6","#E1BE6A")
) +
scale_color_manual(
values = c("#1C2617","#262114")
) +
ylab("Protein abundance") +
xlab("") +
geom_jitter(pch = 21,height = 0,aes(fill = condition),size = 2) +
guides(color = FALSE) +
facet_wrap(~gene, scales = 'free') +
theme(legend.position = 'none') +
scale_y_continuous(labels = scientific) +
stat_compare_means(comparisons = list(c("Control","TDP-43 KD")),
label = 'p.signif',tip.length = 0,
size = 10)
clean_data_table = fread(file.path(here::here(),"data","nygc_junction_information.csv"))
clean_data_table = clean_data_table %>%
mutate(rs12973192 = fct_relevel(rs12973192,
"C/C", "C/G", "G/G")) %>%
mutate(number_g_alleles = as.numeric(rs12973192) - 1) %>%
mutate(unc13a_cryptic_leaf_psi = ifelse(is.na(unc13a_cryptic_leaf_psi),0,unc13a_cryptic_leaf_psi)) %>%
mutate(junction_reads_stmn2 = STMN2_annotated_leaf + STMN2_cryptic_leaf) %>%
mutate(junction_reads_unc13a = UNC13A_3prime_leaf + UNC13A_5prime_1_leaf + UNC13A_annotated_leaf) %>%
unique()
print(glue::glue("Number of unique patients: {clean_data_table[,length(unique(participant_id))]}"))
Number of unique patients: 377
print(glue::glue("Number of unique tissue samples: {clean_data_table[,length(unique(sample))]}"))
Number of unique tissue samples: 1349
print("Patients Per Disease Category")
[1] "Patients Per Disease Category"
clean_data_table[,length(unique(participant_id)),by = disease]
disease V1
1: ALS-FTD 23
2: ALS 193
3: Control 77
4: Other 11
5: FTD 61
6: ALS-AD 12
print("Tissues Per Disease Category")
[1] "Tissues Per Disease Category"
clean_data_table[,length(unique(sample)),by = disease]
disease V1
1: ALS-FTD 110
2: ALS 764
3: Control 199
4: Other 70
5: FTD 138
6: ALS-AD 68
print("Number of patients per UNC13A SNP genotype")
[1] "Number of patients per UNC13A SNP genotype"
unique(clean_data_table[,.(participant_id,rs12608932,rs12973192)]) %>% select(-participant_id) %>% table()
rs12973192
rs12608932 C/C C/G G/G
A/A 166 0 0
A/C 0 152 1
C/C 0 1 57
print("Number of tissues per disease")
[1] "Number of tissues per disease"
clean_data_table[,.N,by = c("disease","tissue_clean")]
disease tissue_clean N
1: ALS-FTD Frontal_Cortex 22
2: ALS Frontal_Cortex 132
3: Control Frontal_Cortex 40
4: Other Frontal_Cortex 11
5: ALS-FTD Lumbar_Spinal_Cord 15
6: ALS Lumbar_Spinal_Cord 105
7: Control Lumbar_Spinal_Cord 33
8: Other Lumbar_Spinal_Cord 9
9: ALS-FTD Cervical_Spinal_Cord 14
10: ALS Cervical_Spinal_Cord 103
11: Control Cervical_Spinal_Cord 32
12: Other Cervical_Spinal_Cord 10
13: ALS-FTD Motor_Cortex 28
14: ALS Motor_Cortex 175
15: Control Motor_Cortex 23
16: Other Motor_Cortex 16
17: ALS-FTD Cerebellum 13
18: ALS Cerebellum 129
19: Control Cerebellum 28
20: Other Cerebellum 8
21: FTD Cerebellum 58
22: FTD Frontal_Cortex 45
23: ALS-AD Cerebellum 11
24: ALS-AD Motor_Cortex 13
25: ALS-AD Cervical_Spinal_Cord 10
26: ALS-AD Lumbar_Spinal_Cord 11
27: ALS-AD Frontal_Cortex 12
28: ALS-AD Occipital_Cortex 7
29: ALS-AD Thoracic_Spinal_Cord 4
30: ALS Occipital_Cortex 37
31: ALS Thoracic_Spinal_Cord 33
32: ALS-FTD Occipital_Cortex 6
33: Control Temporal_Cortex 23
34: ALS Temporal_Cortex 23
35: FTD Temporal_Cortex 35
36: Other Occipital_Cortex 7
37: Other Thoracic_Spinal_Cord 6
38: Control Thoracic_Spinal_Cord 5
39: Control Occipital_Cortex 5
40: ALS-FTD Thoracic_Spinal_Cord 5
41: ALS Hippocampus 27
42: ALS-FTD Hippocampus 7
43: Other Hippocampus 3
44: Control Hippocampus 10
disease tissue_clean N
print("Number of partcipants by mutation and disease")
[1] "Number of partcipants by mutation and disease"
clean_data_table[,length(unique(participant_id)),by = c("mutations","disease")]
mutations disease V1
1: None ALS-FTD 13
2: None ALS 145
3: C9orf72 ALS-FTD 10
4: None Control 77
5: None Other 11
6: SOD1 ALS 8
7: OPTN ALS 4
8: C9orf72 ALS 32
9: MATR3 ALS 1
10: ANG ALS 1
11: C9orf72 FTD 12
12: None ALS-AD 11
13: None FTD 42
14: C9orf72 ALS-AD 1
15: TBK1 FTD 2
16: MAPT FTD 5
17: FUS ALS 2
print(glue::glue("Number of patients per pathology:"))
Number of patients per pathology:
clean_data_table[,length(unique(participant_id)),by = .(pathology)]
pathology V1
1: ALS-FTD 23
2: ALS 193
3: control 77
4: Other 11
5: 13
6: ALS-AD 12
7: FTD-TDP-A 24
8: FTD-TDP-B 3
9: FTD-TDP-C 9
10: FTD-TAU 7
11: FTD-FUS 5
FTLD-non-TDP are those with TAU and FUS aggregates
Non-tdp ALS are those with SOD1 or FUS mutations. The n’s are quite low on this unfortunately, only 8 ALS with SOD1 and 2 with FUS mutations.
First we look at detection rate in tissues affected by TDP-43 proteinopathy, For FTLD this is frontal and temporal Cortices, and for ALS this is lumbar, cervical, and thoracic spinal cord samples as well as motor cortex. For controls we also take all 6 tissues, frontal,temporal,motor cortices and the lumbar, cervical, and thoracic spinal cords.
(As a side note, once we do this the number of ALS-non-TDP drops down to 6 (2 FUS) because the ALS sample tissues are not balanced and not every participant has samples in every tissue)
####Inclusion reads by if TDP-potential####
boxplot_table = clean_data_table %>%
mutate(across(UNC13A_3prime_leaf:UNC13A_annotated_leaf, ~ .x / library_size,.names = "{.col}_library_norm")) %>%
filter(!tissue_clean %in% c("Choroid","Liver")) %>%
dplyr::select(sample,participant_id,mutations,disease_group2,pathology,tissue_clean,contains("_library_norm")) %>%
melt() %>%
filter(grepl("_3prime|_5prime_1",variable)) %>%
group_by(sample) %>%
mutate(inclusion_reads = sum(value)) %>%
ungroup() %>%
unique() %>%
mutate(junction_name = case_when(variable == "UNC13A_3prime_leaf_library_norm" ~ " Novel Donor",
variable == "UNC13A_5prime_1_leaf_library_norm" ~ " Short Novel Acceptor",
variable == "UNC13A_5prime_2_leaf_library_norm"~ "Long Novel Acceptor")) %>%
mutate(disease_tissue = case_when((grepl("FTLD",disease_group2) & grepl("Cortex",tissue_clean)) ~ T,
(grepl("ALS",disease_group2) & grepl("Cord|Motor",tissue_clean)) ~ T,
(grepl("Occipital",tissue_clean)) ~ F,
(grepl("Control",disease_group2) & grepl("Cord|Cortex",tissue_clean)) ~ T,
TRUE ~ F)) %>%
mutate(tissue_clean = gsub("_"," ",tissue_clean))
melt_count = clean_data_table[,.(sample,UNC13A_3prime_leaf,UNC13A_5prime_1_leaf,UNC13A_5prime_2_leaf)] %>% data.table::melt() %>% setnames(.,"value","orig_count")
Looking at disease tissue only, so just taking the cord and motor cortex in ALS and the frontal and temporal cortex of FTLD and then the cord and cortices in Controls.
boxplot_table %>%
filter(disease_tissue == T) %>%
mutate(detected = inclusion_reads > 0) %>%
dplyr::select(participant_id,disease_group2,detected) %>%
unique() %>%
group_by(disease_group2) %>%
mutate(n_sample = n_distinct(participant_id)) %>%
mutate(n_sample_detected = sum(detected)) %>%
dplyr::select(disease_group2,n_sample,n_sample_detected) %>%
unique() %>%
mutate(detection_rate = n_sample_detected / n_sample) %>%
mutate(disease_group2 = gsub("Control"," Control",disease_group2)) %>%
mutate(detection_name = glue::glue("{disease_group2} \n ( {n_sample} )")) %>%
ggplot() +
geom_col(aes(x = detection_name, y = detection_rate)) +
ggpubr::theme_pubr() +
scale_y_continuous(lim = c(0,1),labels = scales::percent) +
ylab("Percent of Patients \n UNC13A Cryptic Detected") +
theme(text = element_text(size = 24)) +
xlab("N individuals")
boxplot_table %>%
filter(!(tissue_clean %in% c("Cerebellum","Hippocampus","Occipital Cortex"))) %>%
mutate(detected = inclusion_reads > 0) %>%
dplyr::select(sample,disease_group2,detected,tissue_clean) %>%
unique() %>%
group_by(disease_group2,tissue_clean) %>%
mutate(n_sample = n_distinct(sample)) %>%
mutate(n_sample_detected = sum(detected)) %>%
dplyr::select(tissue_clean,disease_group2,n_sample,n_sample_detected) %>%
unique() %>%
mutate(detection_rate = n_sample_detected / n_sample) %>%
mutate(disease_group2 = gsub("Control"," Control",disease_group2)) %>%
ungroup() %>%
filter(grepl("ALS-TDP|FTLD-T",disease_group2)) %>%
mutate(tissue_clean = fct_relevel(tissue_clean, "Cervical Spinal Cord",
"Frontal Cortex",
"Lumbar Spinal Cord",
"Motor Cortex",
"Thoracic Spinal Cord")) %>%
mutate(detection_name = glue::glue("{disease_group2} \n ( {n_sample} )")) %>%
mutate(cortex = ifelse(grepl(" Cord",tissue_clean),"cot","cor")) %>%
ggplot() +
geom_col(aes(x = detection_name, y = detection_rate,fill = detection_name),position = position_dodge2(width = 0.8, preserve = "single")) +
ggpubr::theme_pubr() +
ylab("Percent of Tissues \n UNC13A Cryptic Detected") +
theme(text = element_text(size = 18)) +
xlab("N tissues") +
facet_wrap(~tissue_clean,scales = 'free_x',nrow = 3) +
scale_y_continuous(lim = c(0,1),labels = scales::percent,expand = c(0,0) ) +
theme(axis.text.x = element_text(size = 14)) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(text = element_text(size = 18)) +
scale_fill_manual(values = c("#4C97B5","#4C97B5","#4C97B5","#4C97B5","#4C97B5","red","#C87156","#C87156")) +
theme(legend.position = "none")
boxplot_table %>%
left_join(clean_data_table %>% dplyr::select(sample, rs12973192)) %>%
unique() %>%
filter(!(tissue_clean %in% c("Cerebellum","Hippocampus","Occipital Cortex"))) %>%
mutate(detected = inclusion_reads > 0) %>%
dplyr::select(sample,disease_group2,detected,tissue_clean,rs12973192) %>%
unique() %>%
group_by(disease_group2,tissue_clean,rs12973192) %>%
mutate(n_sample = n_distinct(sample)) %>%
mutate(n_sample_detected = sum(detected)) %>%
dplyr::select(tissue_clean,disease_group2,n_sample,n_sample_detected) %>%
unique() %>%
mutate(detection_rate = n_sample_detected / n_sample) %>%
mutate(disease_group2 = gsub("Control"," Control",disease_group2)) %>%
ungroup() %>%
filter(grepl("ALS-TDP|FTLD-T",disease_group2)) %>%
mutate(tissue_clean = fct_relevel(tissue_clean, "Cervical Spinal Cord",
"Frontal Cortex",
"Lumbar Spinal Cord",
"Motor Cortex",
"Thoracic Spinal Cord")) %>%
mutate(detection_name = glue::glue("{disease_group2} \n ( {n_sample} )")) %>%
mutate(cortex = ifelse(grepl(" Cord",tissue_clean),"cot","cor")) %>%
ggplot() +
geom_col(aes(x = detection_name, y = detection_rate,fill = rs12973192),position = position_dodge2(width = 0.8, preserve = "single")) +
ggpubr::theme_pubr() +
ylab("Percent of Tissues \n UNC13A Cryptic Detected") +
theme(text = element_text(size = 18)) +
xlab("N tissues") +
facet_wrap(~tissue_clean,scales = 'free_x',nrow = 3) +
scale_y_continuous(lim = c(0,1),labels = scales::percent,expand = c(0,0) ) +
theme(axis.text.x = element_text(size = 14)) +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(text = element_text(size = 18)) +
theme(legend.position = "bottom")
Joining, by = "sample"
Adding missing grouping variables: `rs12973192`
This only plots the short novel acceptor and novel donor
boxplot_table %>%
filter(tissue_clean %in%
c("Frontal Cortex",
"Lumbar Spinal Cord",
"Cervical Spinal Cord",
"Motor Cortex",
"Temporal Cortex",
"Cerebellum") )%>%
group_by(disease_group2,tissue_clean) %>%
mutate(n_sample = n_distinct(sample)) %>%
ungroup() %>%
mutate(disease_group2 = gsub("Control", " Control",disease_group2)) %>%
mutate(detection_name = glue::glue("{disease_group2} \n ( {n_sample} )")) %>%
mutate(tissue_clean = fct_relevel(tissue_clean, "Cervical Spinal Cord",
"Frontal Cortex",
"Lumbar Spinal Cord",
"Temporal Cortex",
"Motor Cortex")) %>%
ggplot(aes(x = detection_name,
y = inclusion_reads * 10^6,
fill ="red")) +
geom_boxplot(show.legend = F,position = position_dodge(preserve = "single",width = 1),outlier.colour = NA) +
geom_point(size = 2,show.legend = F,pch = 21, position = position_jitterdodge(dodge.width=1,jitter.width = 0.3))+
ggplot2::facet_wrap(vars(tissue_clean),scales = "free_x",nrow = 3) +
ylab("UNC13A cryptic \n reads per million") +
theme(text = element_text(size = 12)) +
xlab("") +
scale_fill_manual(values = colorblind_pal()(4)[2:4]) +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(text = element_text(size = 18)) +
theme(legend.position = "bottom")
clean_data_table %>%
mutate(across(UNC13A_3prime_leaf:UNC13A_annotated_leaf, ~ .x / library_size,.names = "{.col}_library_norm")) %>%
filter(!tissue_clean %in% c("Choroid","Liver")) %>%
dplyr::select(sample,participant_id,mutations,disease_group2,pathology,tissue_clean,contains("_library_norm")) %>%
melt() %>%
filter(grepl("_3prime|_5prime_1|_5prime_2",variable)) %>%
# filter(grepl("_5prime_2",variable)) %>%
unique() %>%
mutate(junction_name = case_when(variable == "UNC13A_3prime_leaf_library_norm" ~ " Novel Donor",
variable == "UNC13A_5prime_1_leaf_library_norm" ~ " Short Novel Acceptor",
variable == "UNC13A_5prime_2_leaf_library_norm"~ "Long Novel Acceptor")) %>%
mutate(tissue_clean = gsub("_"," ",tissue_clean)) %>%
dplyr::select(sample,disease_group2,
tissue_clean,
junction_name,
value) %>%
unique() %>%
filter(tissue_clean %in%
c("Frontal Cortex",
"Lumbar Spinal Cord",
"Cervical Spinal Cord",
"Motor Cortex",
"Temporal Cortex",
"Cerebellum"))%>%
group_by(disease_group2,tissue_clean) %>%
mutate(n_sample = n_distinct(sample)) %>%
ungroup() %>%
mutate(disease_group2 = gsub("Control", " Control",disease_group2)) %>%
mutate(detection_name = glue::glue("{disease_group2} \n ( {n_sample} )")) %>%
mutate(tissue_clean = fct_relevel(tissue_clean, "Cervical Spinal Cord",
"Frontal Cortex",
"Lumbar Spinal Cord",
"Temporal Cortex",
"Motor Cortex")) %>%
ggplot(aes(x = detection_name,
y = value * 10^6,
fill = junction_name)) +
geom_boxplot(show.legend = F,position = position_dodge(preserve = "single")) +
geom_point(size = 2,pch = 21, alpha = 0.7, position = position_jitterdodge(jitter.width = 0.2))+
scale_y_log10() +
ggplot2::facet_wrap(vars(tissue_clean),scales = "free_x",nrow = 3) +
ylab("UNC13A cryptic \n reads per million") +
theme(text = element_text(size = 12)) +
xlab("") +
scale_fill_manual(values = colorblind_pal()(4)[2:4]) +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(text = element_text(size = 18)) +
theme(legend.position = "bottom")
disease_comparisons = list( c("Control","ALS-TDP"),
c("Control","ALS \n non-TDP"),
c("Control","FTLD-TDP"),
c("Control","FTLD \n non-TDP" ))
table_to_test = boxplot_table %>%
filter(disease_tissue == T) %>%
group_by(disease_group2) %>%
mutate(n_sample = n_distinct(sample)) %>%
mutate(disease_group2 = gsub("Control"," Control",disease_group2)) %>%
mutate(detection_name = glue::glue("{disease_group2} \n ( {n_sample} )")) %>%
dplyr::select(detection_name,
participant_id,
inclusion_reads,
disease_group2,
tissue_clean,
disease_tissue) %>%
unique()
test_pair = pairwise.wilcox.test(table_to_test$inclusion_reads, table_to_test$detection_name,
p.adjust.method = "BH") %>% broom::tidy()
test_pair = test_pair %>%
mutate(p_value_draw = case_when(p.value < 0.0001~ "***",
p.value < 0.01 ~ "**",
p.value < 0.05 ~ "*",
TRUE ~ paste0("Adj. p-value \n",as.character(round(p.value,2))))) %>%
mutate(y.position = seq(0.25,by = 0.1,length.out = 7))
table_to_test %>%
ggplot(aes(x = detection_name, y = inclusion_reads * 10^6)) +
geom_boxplot() +
geom_jitter(height = 0) +
scale_y_log10() +
ggpubr::theme_pubr() +
ylab("UNC13A cryptic inclusion \n reads per million") +
theme(text = element_text(size = 24)) +
xlab("N samples") +
stat_pvalue_manual(test_pair %>% filter(p.value < 0.05),
label = "p_value_draw",size = 8) +
stat_compare_means(size = 8)
comps = clean_data_table %>%
group_by(disease_group2,tissue_clean) %>%
mutate(n_sample = n_distinct(sample)) %>%
ungroup() %>%
mutate(disease_group2 = gsub("Control", " Control",disease_group2)) %>%
mutate(detection_name = glue::glue("{disease_group2} \n ( {n_sample} )")) %>%
mutate(tissue_clean = gsub("_"," ",tissue_clean)) %>% pull(detection_name) %>%
unique() %>%
combn(.,2,simplify = F)
clean_data_table %>%
group_by(disease_group2,tissue_clean) %>%
mutate(n_sample = n_distinct(sample)) %>%
ungroup() %>%
mutate(disease_group2 = gsub("Control", " Control",disease_group2)) %>%
mutate(detection_name = glue::glue("{disease_group2} \n ( {n_sample} )")) %>%
mutate(tissue_clean = gsub("_"," ",tissue_clean)) %>%
filter(tissue_clean %in%
c("Frontal Cortex",
"Lumbar Spinal Cord",
"Cervical Spinal Cord",
"Motor Cortex",
"Temporal Cortex",
"Cerebellum") )%>%
mutate(tissue_clean = fct_relevel(tissue_clean, "Cervical Spinal Cord",
"Frontal Cortex",
"Lumbar Spinal Cord",
"Temporal Cortex",
"Motor Cortex")) %>%
ggplot(aes(x = detection_name, y = UNC13A_TPM)) +
ggpubr::theme_pubr() +
geom_boxplot(aes(fill = disease_group2)) +
geom_jitter(pch = 21, color = 'black',width = 0.2,height = 0,aes(fill = disease_group2)) +
ylab("UNC13A TPM") +
xlab("") +
guides(color = FALSE) +
theme(text = element_text(size = 20)) +
facet_wrap(~tissue_clean, scales = 'free',nrow = 3) +
theme(legend.position = 'none') +
stat_compare_means(comparisons = comps)
plot_by_tissue = function(tissue_name,dt){
only_tissue = dt %>%
group_by(disease_group2,tissue_clean) %>%
mutate(n_sample = n_distinct(sample)) %>%
ungroup() %>%
filter(tissue_clean == tissue_name) %>%
mutate(disease_group2 = gsub("Control", " Control",disease_group2)) %>%
mutate(detection_name = glue::glue("{disease_group2} \n ( {n_sample} )")) %>%
mutate(tissue_clean = gsub("_"," ",tissue_clean))
unique_x_values = only_tissue %>%
pull(detection_name) %>%
unique()
comparisons = combn(unique_x_values,2,simplify = F)
the_plot = only_tissue %>%
ggplot(aes(x = detection_name, y = UNC13A_TPM)) +
ggpubr::theme_pubr() +
geom_boxplot(aes(fill = disease_group2)) +
geom_jitter(pch = 21, color = 'black',width = 0.2,height = 0,aes(fill = disease_group2)) +
ylab("UNC13A TPM") +
xlab("") +
guides(color = FALSE) +
theme(text = element_text(size = 20)) +
facet_wrap(~tissue_clean, scales = 'free',nrow = 3) +
theme(legend.position = 'none') +
stat_compare_means() +
stat_compare_means(label = "p.signif",comparisons = comparisons,)
return(the_plot)
}
dis_tis = clean_data_table[disease_tissue == T,unique(tissue_clean)]
dis_tis = c("Cerebellum",dis_tis)
my_plots = purrr::map(dis_tis,plot_by_tissue, clean_data_table)
ggarrange(plotlist=my_plots)
only in ALS/FTLD-TDP
clean_data_table %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
ggplot(aes(x = stmn_2_cryptic_psi_leaf, y = UNC13A_TPM)) +
geom_point() +
stat_cor(size = 8) +
geom_smooth(method = lm, se = FALSE,color = "black") +
xlab("STMN2 Cryptic PSI") +
ylab("UNC13A TPM") +
ggpubr::theme_pubr() +
theme(text = element_text(size = 18)) +
ylim(0,50)
`geom_smooth()` using formula 'y ~ x'
clean_data_table %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
ggplot(aes(x = stmn_2_cryptic_psi_leaf, y = UNC13A_TPM)) +
geom_point() +
stat_cor(size = 8) +
geom_smooth(method = lm, se = FALSE,color = "black") +
xlab("STMN2 Cryptic PSI") +
ylab("UNC13A TPM") +
ggpubr::theme_pubr() +
theme(text = element_text(size = 18)) +
facet_wrap(~tissue_clean,scales = "free_y")
`geom_smooth()` using formula 'y ~ x'
clean_data_table %>%
filter(unc13a_cryptic_leaf_psi > 0 ) %>%
filter(UNC13A_annotated_leaf > 15) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
filter(grepl("Cortex",tissue_clean)) %>%
filter(unc13a_cryptic_leaf_psi > 0) %>%
ggplot(aes(x = unc13a_cryptic_leaf_psi, y = UNC13B_TPM)) +
geom_point() +
stat_cor(label.y = 20,size = 8,method = 's',cor.coef.name = 'rho') +
geom_smooth(method = lm, se = FALSE,color = "black") +
ylab("UNC13A TPM") +
xlab("UNC13A CE PSI") +
ggpubr::theme_pubr() +
theme(text = element_text(size = 22)) +
scale_x_continuous(labels = scales::percent)
`geom_smooth()` using formula 'y ~ x'
clean_data_table %>%
filter(unc13a_cryptic_leaf_psi > 0 ) %>%
filter(UNC13A_annotated_leaf > 15) %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
ggplot(aes(x = unc13a_cryptic_leaf_psi, y = UNC13A_TPM, color = disease_group2)) +
geom_point() +
stat_cor(size = 8) +
geom_smooth(method = lm, se = FALSE,color = "black") +
ylab("UNC13A TPM") +
xlab("UNC13A CE PSI") +
ggpubr::theme_pubr() +
theme(text = element_text(size = 18)) +
facet_wrap(~tissue_clean,scales = 'free') +
scale_x_continuous(labels = scales::percent)
`geom_smooth()` using formula 'y ~ x'
clean_data_table %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
filter(junction_reads_stmn2 >= 30) %>%
filter(junction_reads_unc13a >= 30) %>%
# filter(UNC13A_3prime_leaf + UNC13A_5prime_1_leaf >= 2) %>%
# filter(STMN2_cryptic_leaf >= 2) %>%
filter(grepl("Cortex",tissue_clean)) %>%
filter(stmn_2_cryptic_psi_leaf > 0 ) %>%
filter(unc13a_cryptic_leaf_psi > 0) %>%
ggplot(aes(x = stmn_2_cryptic_psi_leaf, y = unc13a_cryptic_leaf_psi)) +
geom_point() +
stat_cor(size = 8,method = "spearman",cor.coef.name = "rho") +
geom_smooth(method = lm, se = FALSE,color = "black") +
xlab("STMN2 Cryptic PSI") +
ylab("UNC13A CE PSI") +
ggpubr::theme_pubr() +
theme(text = element_text(size = 18)) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1L)) +
scale_x_continuous(labels = scales::percent_format(accuracy = 1L))
`geom_smooth()` using formula 'y ~ x'
clean_data_table %>%
mutate(tissue_clean = gsub("_"," ",tissue_clean)) %>%
filter(tissue_clean %in%
c("Frontal Cortex",
"Lumbar Spinal Cord",
"Cervical Spinal Cord",
"Motor Cortex",
"Temporal Cortex",
"Cerebellum") )%>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
filter(disease_tissue == T) %>%
filter(UNC13A_3prime_leaf + UNC13A_5prime_1_leaf >= 1) %>%
filter(STMN2_cryptic_leaf >= 1) %>%
dplyr::select(sample,disease_group2,tissue_clean,stmn_2_cryptic_psi_leaf,unc13a_cryptic_leaf_psi) %>%
melt() %>%
mutate(tissue_clean = fct_relevel(tissue_clean,"Motor Cortex", "Cervical Spinal Cord",
"Frontal Cortex",
"Lumbar Spinal Cord",
"Temporal Cortex")) %>%
mutate(variable = ifelse(grepl("stmn_2",variable), "STMN2", "UNC13A")) %>%
ggplot(aes(x = variable, y = value)) +
geom_boxplot(aes(color = variable),outlier.shape = NA) +
geom_jitter(aes(fill = variable),pch = 21,height = 0,width = 0.2,size = 3) +
scale_fill_manual(values = c("#008574","#cd5582")) +
scale_color_manual(values = c("#008574","#cd5582")) +
facet_wrap(~tissue_clean,nrow = 3, scales = 'free') +
scale_y_continuous(labels = scales::percent_format(accuracy = 1L)) +
ggpubr::theme_pubr() +
theme(text = element_text(size = 24)) +
xlab("") +
ylab("CE PSI") +
theme(legend.position = 'none')
clean_data_table %>%
filter(UNC13A_annotated_leaf > 10) %>%
filter(STMN2_annotated_leaf > 10) %>%
# filter(stmn_2_cryptic_psi_leaf > 0 ) %>%
# filter(unc13a_cryptic_leaf_psi > 0) %>%
# filter(disease_tissue == T) %>%
mutate(tissue_clean = gsub("_"," ",tissue_clean)) %>%
filter(tissue_clean %in% c("Motor Cortex", "Cervical Spinal Cord", "Frontal Cortex", "Lumbar Spinal Cord", "Temporal Cortex")) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
dplyr::select(sample,disease_group2,tissue_clean,stmn_2_cryptic_psi_leaf,unc13a_cryptic_leaf_psi) %>%
mutate(log2_rat = log2((unc13a_cryptic_leaf_psi + 0.1) / (stmn_2_cryptic_psi_leaf + 0.1))) %>%
group_by(tissue_clean, disease_group2) %>%
mutate(average_log2Fold = mean(log2_rat)) %>%
ungroup() %>%
dplyr::select(-log2_rat) %>%
pivot_longer(cols = c("stmn_2_cryptic_psi_leaf","unc13a_cryptic_leaf_psi")) %>%
mutate(tissue_clean = fct_reorder(tissue_clean,average_log2Fold)) %>%
mutate(variable = ifelse(grepl("stmn_2",name), "STMN2", "UNC13A")) %>%
ggplot(aes(x = tissue_clean, y = value,fill = name)) +
geom_boxplot() +
geom_point(pch = 21,height = 0,width = 0.2,position = position_jitterdodge()) +
scale_fill_manual(values = c("#004D40","#882255")) +
facet_wrap(~tissue_clean+disease_group2,nrow = 1,scales = 'free') +
theme(text = element_text(size = 24)) +
xlab("") +
ylab("PSI") +
theme(legend.position = 'none')
clean_data_table %>%
# filter(stmn_2_cryptic_psi_leaf > 0 ) %>%
# filter(unc13a_cryptic_leaf_psi > 0) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
dplyr::select(sample,disease_group2,tissue_clean,stmn_2_cryptic_psi_leaf,unc13a_cryptic_leaf_psi) %>%
melt() %>%
mutate(tissue_clean = gsub("_"," ",tissue_clean)) %>%
mutate(tissue_clean = fct_relevel(tissue_clean,"Motor Cortex", "Cervical Spinal Cord",
"Frontal Cortex",
"Lumbar Spinal Cord",
"Temporal Cortex")) %>%
mutate(variable = ifelse(grepl("stmn_2",variable), "STMN2", "UNC13A")) %>%
ggplot(aes(x = variable, y = value)) +
geom_boxplot(aes(color = variable)) +
geom_jitter(aes(fill = variable),pch = 21,height = 0,width = 0.2,size = 3) +
scale_fill_manual(values = c("#004D40","#882255")) +
scale_color_manual(values = c("#004D40","#882255")) +
facet_wrap(~tissue_clean+disease_group2,nrow = 3,scales = 'free') +
scale_y_continuous(labels = scales::percent) +
ggpubr::theme_pubr() +
theme(text = element_text(size = 24)) +
xlab("") +
ylab("PSI") +
theme(legend.position = 'none')
clean_data_table %>%
filter(UNC13A_annotated_leaf > 10) %>%
filter(STMN2_annotated_leaf > 10) %>%
filter(stmn_2_cryptic_psi_leaf > 0 ) %>%
filter(unc13a_cryptic_leaf_psi > 0) %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
dplyr::select(sample,disease_group2,tissue_clean,stmn_2_cryptic_psi_leaf,unc13a_cryptic_leaf_psi) %>%
mutate(log2_unc = log2((unc13a_cryptic_leaf_psi)/(stmn_2_cryptic_psi_leaf)))%>%
mutate(tissue_clean = gsub("_","\n",tissue_clean)) %>%
# mutate(variable = ifelse(grepl("stmn_2",variable), "STMN2", "UNC13A")) %>%
ggplot(aes(x = tissue_clean,y = log2_unc, fill = tissue_clean)) +
geom_boxplot(position = 'dodge') +
# geom_jitter(height = 0,width = 0.2,pch = 21) +
facet_wrap(~disease_group2,scales = "free_x") +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(text = element_text(size = 24)) +
xlab("") +
ylab("PSI") +
theme(legend.position = 'top') +
theme(legend.title=element_blank()) +
stat_compare_means(comparisons = list(c("STMN2","UNC13A")))
rs12973192_comparisons = clean_data_table %>%
filter(junction_reads_unc13a >=30) %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
dplyr::select(sample,tissue_clean,disease_group2,rs12973192,stmn_2_cryptic_psi_leaf,unc13a_cryptic_leaf_psi) %>%
mutate(tissue_clean = gsub("_"," ",tissue_clean)) %>%
group_by(rs12973192) %>%
mutate(n_sample = length(unique(sample))) %>%
ungroup() %>%
mutate(detection_name = glue::glue("{rs12973192} \n ( {n_sample} )"))
comp_for_gg = rs12973192_comparisons %>% pull(detection_name) %>%
unique() %>%
combn(.,2,simplify = F)
ce_psi = rs12973192_comparisons %>%
ggplot(aes(y = unc13a_cryptic_leaf_psi, x = detection_name, fill = rs12973192)) +
geom_boxplot() +
geom_jitter( height = 0) +
ylab("UNC13A PSI") +
xlab(element_blank()) +
scale_fill_manual(values = c("#88CCEE","#44AA99","#1B7739")) +
ggpubr::theme_pubr() +
theme(text = element_text(size = 24)) +
stat_compare_means() +
theme(legend.position = "none") +
stat_compare_means(comparisons = comp_for_gg,label = "p.signif") +
scale_y_continuous(trans='log10',labels = scales::percent_format(accuracy = 1L)) +
ggtitle("CE SNP (rs12973192)")
rs12608932_comparisons = clean_data_table %>%
filter(junction_reads_unc13a >=30) %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
dplyr::select(sample,tissue_clean,disease_group2,rs12608932,stmn_2_cryptic_psi_leaf,unc13a_cryptic_leaf_psi) %>%
mutate(tissue_clean = gsub("_"," ",tissue_clean)) %>%
group_by(rs12608932) %>%
mutate(n_sample = length(unique(sample))) %>%
ungroup() %>%
mutate(detection_name = glue::glue("{rs12608932} \n ( {n_sample} )"))
comp_for_gg = rs12608932_comparisons %>% pull(detection_name) %>%
unique() %>%
combn(.,2,simplify = F)
in_psi = rs12608932_comparisons %>%
ggplot(aes(y = unc13a_cryptic_leaf_psi, x = detection_name, fill = rs12608932)) +
geom_boxplot() +
geom_jitter( height = 0) +
ylab(element_blank()) +
xlab(element_blank()) +
scale_fill_manual(values = c("#88CCEE","#44AA99","#1B7739")) +
ggpubr::theme_pubr() +
theme(legend.position = "none") +
theme(text = element_text(size = 24)) +
stat_compare_means() +
stat_compare_means(comparisons = comp_for_gg,label = "p.signif") +
scale_y_continuous(trans='log10',labels = scales::percent_format(accuracy = 1L)) +
ggtitle("intronic SNP (rs12973192)")
ggpubr::ggarrange(ce_psi,in_psi)
genotype_comparisons = list(c("C/C", "C/G"), c("C/C", "G/G"), c("C/G", "G/G"))
clean_data_table %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
dplyr::select(sample,tissue_clean,disease_group2,rs12973192,stmn_2_cryptic_psi_leaf,unc13a_cryptic_leaf_psi) %>%
mutate(tissue_clean = gsub("_"," ",tissue_clean)) %>%
group_by(rs12973192) %>%
mutate(n_sample = length(unique(sample))) %>%
ungroup() %>%
mutate(detection_name = glue::glue("{rs12973192} \n ( {n_sample} )")) %>%
ggplot(aes(y = unc13a_cryptic_leaf_psi, x = rs12973192, fill = rs12973192)) +
geom_boxplot() +
geom_jitter( height = 0) +
facet_wrap(~disease_group2,scales = 'free') +
ylab("UNC13A PSI") +
xlab("UNC13A rs12973192 Genotype") +
scale_fill_manual(values = c("#88CCEE","#44AA99","#1B7739")) +
ggpubr::theme_pubr() +
theme(text = element_text(size = 24)) +
stat_compare_means(comparisons = genotype_comparisons,label = "p.signif")
genotype_comparisons = list(c("C/C", "C/G"), c("C/C", "G/G"), c("C/G", "G/G"))
clean_data_table %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
dplyr::select(sample,tissue_clean,disease_group2,rs12973192,stmn_2_cryptic_psi_leaf,unc13a_cryptic_leaf_psi) %>%
mutate(tissue_clean = gsub("_"," ",tissue_clean)) %>%
group_by(rs12973192) %>%
mutate(n_sample = length(unique(sample))) %>%
ungroup() %>%
mutate(detection_name = glue::glue("{rs12973192} \n ( {n_sample} )")) %>%
ggplot(aes(y = unc13a_cryptic_leaf_psi, x = rs12973192, fill = rs12973192)) +
geom_boxplot() +
geom_jitter( height = 0) +
facet_wrap(~disease_group2 + tissue_clean,scales = 'free') +
ylab("UNC13A PSI") +
xlab("UNC13A rs12973192 Genotype") +
scale_fill_manual(values = c("#88CCEE","#44AA99","#1B7739")) +
ggpubr::theme_pubr() +
theme(text = element_text(size = 24)) +
stat_compare_means(comparisons = genotype_comparisons,label = "p.signif")
clean_data_table %>%
filter(unc13a_cryptic_leaf_psi > 0 ) %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
dplyr::select(sample,tissue_clean,disease_group2,rs12973192,stmn_2_cryptic_psi_leaf,unc13a_cryptic_leaf_psi) %>%
mutate(tissue_clean = gsub("_"," ",tissue_clean)) %>%
group_by(rs12973192) %>%
mutate(n_sample = length(unique(sample))) %>%
ungroup() %>%
mutate(detection_name = glue::glue("{rs12973192} \n ( {n_sample} )")) %>%
ggplot(aes(y = unc13a_cryptic_leaf_psi, x = rs12973192, fill = rs12973192)) +
geom_boxplot() +
geom_jitter( height = 0) +
facet_wrap(~disease_group2 + tissue_clean,scales = 'free') +
ylab("UNC13A PSI") +
xlab("UNC13A rs12973192 Genotype") +
scale_fill_manual(values = c("#88CCEE","#44AA99","#1B7739")) +
ggpubr::theme_pubr() +
theme(text = element_text(size = 24)) +
stat_compare_means(comparisons = genotype_comparisons,label = "p.signif")
clean_data_table %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
dplyr::select(sample,tissue_clean,disease_group2,rs12973192,stmn_2_cryptic_psi_leaf,unc13a_cryptic_leaf_psi) %>%
mutate(tissue_clean = gsub("_"," ",tissue_clean)) %>%
ggplot(aes(y = stmn_2_cryptic_psi_leaf, x = rs12973192, fill = rs12973192)) +
geom_boxplot() +
geom_jitter( height = 0) +
facet_wrap(~disease_group2,scales = 'free') +
ylab("STMN2 PSI") +
xlab("UNC13A rs12973192 Genotype") +
scale_fill_manual(values = c("#88CCEE","#44AA99","#1B7739")) +
ggpubr::theme_pubr() +
theme(text = element_text(size = 24)) +
stat_compare_means(comparisons = genotype_comparisons,label = "p.signif")
clean_data_table %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
dplyr::select(sample,tissue_clean,disease_group2,rs12973192,stmn_2_cryptic_psi_leaf,unc13a_cryptic_leaf_psi) %>%
mutate(tissue_clean = gsub("_"," ",tissue_clean)) %>%
ggplot(aes(y = stmn_2_cryptic_psi_leaf, x = rs12973192, fill = rs12973192)) +
geom_boxplot() +
geom_jitter( height = 0) +
facet_wrap(~disease_group2 + tissue_clean,scales = 'free') +
ylab("STMN2 PSI") +
xlab("UNC13A rs12973192 Genotype") +
scale_fill_manual(values = c("#88CCEE","#44AA99","#1B7739")) +
ggpubr::theme_pubr() +
theme(text = element_text(size = 24)) +
stat_compare_means(comparisons = genotype_comparisons,label = "p.signif")
clean_data_table %>%
filter(stmn_2_cryptic_psi_leaf > 0 ) %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
dplyr::select(sample,tissue_clean,disease_group2,rs12973192,stmn_2_cryptic_psi_leaf,unc13a_cryptic_leaf_psi) %>%
mutate(tissue_clean = gsub("_"," ",tissue_clean)) %>%
ggplot(aes(y = stmn_2_cryptic_psi_leaf, x = rs12973192, fill = rs12973192)) +
geom_boxplot() +
geom_jitter( height = 0) +
facet_wrap(~disease_group2 + tissue_clean,scales = 'free') +
ylab("STMN2 PSI") +
xlab("UNC13A rs12973192 Genotype") +
scale_fill_manual(values = c("#88CCEE","#44AA99","#1B7739")) +
ggpubr::theme_pubr() +
theme(text = element_text(size = 24)) +
stat_compare_means(comparisons = genotype_comparisons,label = "p.signif")
ce_snp = clean_data_table %>%
filter(disease_tissue == T) %>%
filter(junction_reads_stmn2 >= 30) %>%
filter(junction_reads_unc13a >= 30) %>%
filter(grepl("Cortex",tissue_clean)) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
mutate(rs12973192 = fct_relevel(rs12973192,
"C/C", "C/G", "G/G")) %>%
filter(!grepl("Cord",tissue_clean)) %>%
mutate(log10_stmn2 = log10(stmn_2_cryptic_psi_leaf)) %>%
mutate(log10_unc13a = log10(unc13a_cryptic_leaf_psi)) %>%
ggpubr::ggscatter(.,
x = "log10_stmn2",
y = "log10_unc13a",
color = 'rs12973192',
add = "reg.line", size = 3
) +
ylab("UNC13A CE PSI ") +
xlab("STMN2 Cryptic PSI ") +
stat_cor(aes( x = stmn_2_cryptic_psi_leaf,
y = unc13a_cryptic_leaf_psi,
color = rs12973192),
method = 'spearman',
cor.coef.name = 'rho',
show.legend = FALSE,
size = 14) +
scale_color_manual(values = c("#88CCEE","#44AA99","#1B7739")) +
theme(legend.title = element_blank()) +
theme(text = element_text(size = 24),legend.text = element_text(size = 32)) +
ylab(expression(paste("Lo", g[10], "UNC13A CE PSI"))) +
xlab(expression(paste("Lo", g[10], "STMN2 CE PSI"))) +
ggtitle("CE SNP (rs12973192) genotype")
in_snp = clean_data_table %>%
filter(disease_tissue == T) %>%
filter(junction_reads_stmn2 >= 30) %>%
filter(junction_reads_unc13a >= 30) %>%
filter(grepl("Cortex",tissue_clean)) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
mutate(rs12608932 = fct_relevel(rs12608932,
"A/A", "A/C", "C/C")) %>%
filter(!grepl("Cord",tissue_clean)) %>%
mutate(log10_stmn2 = log10(stmn_2_cryptic_psi_leaf)) %>%
mutate(log10_unc13a = log10(unc13a_cryptic_leaf_psi)) %>%
ggpubr::ggscatter(.,
x = "log10_stmn2",
y = "log10_unc13a",
color = 'rs12608932',
add = "reg.line", size = 3
) +
ylab("UNC13A CE PSI ") +
xlab("STMN2 Cryptic PSI ") +
stat_cor(aes( x = stmn_2_cryptic_psi_leaf,
y = unc13a_cryptic_leaf_psi,
color = rs12608932),
method = 'spearman',
cor.coef.name = 'rho',
show.legend = FALSE,
size = 14) +
scale_color_manual(values = c("#88CCEE","#44AA99","#1B7739")) +
theme(legend.title = element_blank()) +
theme(text = element_text(size = 24),legend.text = element_text(size = 32)) +
ylab(expression(paste("Lo", g[10], "UNC13A CE PSI"))) +
xlab(expression(paste("Lo", g[10], "STMN2 CE PSI"))) +
ggtitle("intronic SNP (rs12608932) genotype")
ggpubr::ggarrange(ce_snp,in_snp)
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
detected_table_for_graph = clean_data_table %>%
filter(!(rs12973192 == "C/G" & rs12608932 == "C/C")) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
filter(unc13a_cryptic_leaf_psi > 0 ) %>%
filter(stmn_2_cryptic_psi_leaf > 0 ) %>%
filter(junction_reads_stmn2 >= 30) %>%
filter(junction_reads_unc13a >= 30) %>%
filter(grepl("Cortex",tissue_clean)) %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
dplyr::select(sample,tissue_clean,disease_group2,rs12973192,rs12608932,stmn_2_cryptic_psi_leaf,unc13a_cryptic_leaf_psi) %>%
mutate(log2_unc = log2((unc13a_cryptic_leaf_psi)/(stmn_2_cryptic_psi_leaf))) %>%
mutate(tissue_clean = gsub("_","\n",tissue_clean)) %>%
group_by(rs12973192) %>%
mutate(n_sample = length(unique(sample))) %>%
ungroup() %>%
mutate(detection_name = glue::glue("{rs12973192} \n ( {n_sample} )")) %>%
mutate(no_cong = ifelse(rs12608932 == "C/C" & rs12973192 == "C/G",8,3)) %>% as.data.table()
#get out that person who is discongruous
comparisons = combn(unique(detected_table_for_graph$detection_name),2,simplify = F)
detected_table_for_graph %>%
ggplot(aes(y = log2_unc, x = detection_name,fill = detection_name)) +
geom_boxplot(show.legend = F) +
geom_jitter(show.legend = F, alpha = 0.8, height = 0,pch = 21) +
stat_compare_means() +
stat_compare_means(aes(group = detection_name),comparisons = comparisons,label = "p.signif") +
ylab("Ratio UNC13A CE PSI / STMN2 PSI") +
ylab(expression(paste("Lo", g[2], " fold UNC13A CE PSI / STMN2 CE PSI "))) +
xlab("UNC13A rs12973192 Genotype") +
scale_fill_manual(values = c("#88CCEE","#44AA99","#1B7739")) +
ggpubr::theme_pubr() +
theme(text = element_text(size = 24)) +
geom_hline(yintercept = 0,size = 1.5)
detected_table_for_graph = clean_data_table %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
filter(unc13a_cryptic_leaf_psi > 0 ) %>%
filter(stmn_2_cryptic_psi_leaf > 0 ) %>%
filter(junction_reads_stmn2 >= 30) %>%
filter(junction_reads_unc13a >= 30) %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
dplyr::select(sample,tissue_clean,disease_group2,rs12973192,rs12608932,stmn_2_cryptic_psi_leaf,unc13a_cryptic_leaf_psi) %>%
mutate(log2_unc = log2((unc13a_cryptic_leaf_psi)/(stmn_2_cryptic_psi_leaf))) %>%
mutate(tissue_clean = gsub("_","\n",tissue_clean)) %>%
group_by(rs12973192) %>%
mutate(n_sample = length(unique(sample))) %>%
ungroup() %>%
mutate(detection_name = glue::glue("{rs12973192} \n ( {n_sample} )")) %>%
mutate(no_cong = ifelse(rs12608932 == "C/C" & rs12973192 == "C/G",8,3))
comparisons = combn(unique(detected_table_for_graph$detection_name),2,simplify = F)
detected_table_for_graph %>%
ggplot(aes(y = log2_unc, x = detection_name,fill = detection_name)) +
geom_boxplot(show.legend = F) +
geom_jitter(show.legend = F, alpha = 0.8, height = 0,pch = 21) +
stat_compare_means( label = "p.format") +
stat_compare_means(aes(group = detection_name),comparisons = comparisons,label = "p.signif") +
ylab("Ratio UNC13A CE PSI / STMN2 PSI") +
ylab(expression(paste("Lo", g[2], " fold UNC13A CE PSI / STMN2 CE PSI "))) +
xlab("UNC13A rs12973192 Genotype") +
scale_fill_manual(values = c("#88CCEE","#44AA99","#1B7739")) +
ggpubr::theme_pubr() +
theme(text = element_text(size = 24)) +
geom_hline(yintercept = 0,size = 1.5)
detected_table_for_graph_rs12608932 = clean_data_table %>%
filter(grepl("Cortex",tissue_clean)) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
filter(unc13a_cryptic_leaf_psi > 0 ) %>%
filter(stmn_2_cryptic_psi_leaf > 0 ) %>%
filter(junction_reads_stmn2 >= 30) %>%
filter(junction_reads_unc13a >= 30) %>%
filter(grepl("Cortex",tissue_clean)) %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
dplyr::select(sample,tissue_clean,disease_group2,rs12608932,stmn_2_cryptic_psi_leaf,unc13a_cryptic_leaf_psi) %>%
mutate(log2_unc = log2((unc13a_cryptic_leaf_psi)/(stmn_2_cryptic_psi_leaf))) %>%
mutate(tissue_clean = gsub("_","\n",tissue_clean)) %>%
group_by(rs12608932) %>%
mutate(n_sample = length(unique(sample))) %>%
ungroup() %>%
mutate(detection_name = glue::glue("{rs12608932} \n ( {n_sample} )"))
comparisons = combn(unique(detected_table_for_graph_rs12608932$detection_name),2,simplify = F)
detected_table_for_graph_rs12608932 %>%
ggplot(aes(y = log2_unc, x = detection_name,fill = detection_name)) +
geom_boxplot(show.legend = F) +
geom_jitter(show.legend = F, alpha = 0.8, height = 0,pch = 21) +
stat_compare_means( label = "p.format") +
stat_compare_means(aes(group = detection_name),comparisons = comparisons,label = "p.signif") +
ylab("Ratio UNC13A CE PSI / STMN2 PSI") +
ylab(expression(paste("Lo", g[2], " fold UNC13A CE PSI / STMN2 CE PSI "))) +
xlab("UNC13A rs12608932 Genotype") +
scale_fill_manual(values = c("#88CCEE","#44AA99","#1B7739")) +
ggpubr::theme_pubr() +
theme(text = element_text(size = 24)) +
geom_hline(yintercept = 0,size = 1.5)
overall_fisher = clean_data_table %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
mutate(unc13a_detected = unc13a_cryptic_leaf_psi > 0) %>%
dplyr::select(participant_id,unc13a_detected,rs12973192) %>%
unique() %>%
group_by(rs12973192) %>%
mutate(n_sample = n_distinct(participant_id)) %>%
mutate(n_sample_detected = sum(unc13a_detected)) %>%
dplyr::select(rs12973192,n_sample,n_sample_detected) %>%
unique() %>%
mutate(n_non_detected = n_sample - n_sample_detected) %>%
dplyr::select(-n_sample)
over_p = overall_fisher %>%
column_to_rownames('rs12973192') %>%
chisq.test() %>%
broom::tidy() %>%
.$p.value
overall_fisher %>%
mutate(n_sample = n_non_detected + n_sample_detected) %>%
mutate(detection_rate = n_sample_detected / n_sample) %>%
mutate(detection_name = glue::glue("{rs12973192} \n ( {n_sample} )")) %>%
ggplot(aes(x = detection_name, y = detection_rate, fill = detection_name)) +
geom_col(show.legend = F) +
ggpubr::theme_pubr() +
scale_y_continuous(labels = scales::percent) +
ylab("% of TDP-43 Proteionopathy Patients \n UNC13A Cryptic Detected") +
theme(text = element_text(size = 18)) +
xlab("N individuals") +
scale_fill_manual(values = c("#88CCEE","#44AA99","#1B7739"))
Although this difference is not significant, with the Fisher’s exact giving a p-value of 0.28.
Across our KD’s, we observed a clear allelic bias in the SK-DZ-N cells
kd_vafs = fread(file.path(here::here(),"data","all_kds_unc13.snp.out.tsv"))
kd_vafs %>%
left_join(rel_rna_cryptic_amount) %>%
filter(!is.na(condition)) %>%
mutate(VAF = alt_reads / total) %>%
filter(condition != "control" ) %>%
filter(alt_reads > 5) %>%
ggplot(aes(x = stmn_2_cryptic_psi,y = cryptic_psi, fill = -(VAF - 0.5),size = total)) +
geom_point(pch = 21) +
coord_fixed() +
ylim(0,0.8) +
xlim(0,0.8) +
geom_abline() +
scale_fill_gradient2()
Joining, by = "sample"
Also look at the coverage in the Liu nuclear facs sorted neurons
liu_vafs = fread(file.path(here::here(),"data","liu_facs_neurons_unc13.snp.out.tsv"))
liu_stmn2 = fread(file.path(here::here(),"data","liu_stmn2_and_unc13aaggregated.clean.annotated.bed"))
liu_stmn2[,sample := gsub(".SJ.out","",V4)]
liu_stmn2 = liu_stmn2 %>%
unique() %>%
pivot_wider(id_cols = 'sample',
names_from = "V7",
values_from = 'V5',
values_fill = 0)
liu_stmn2 <- liu_stmn2 %>%
mutate(stmn2_psi = STMN2_cryptic / (STMN2_cryptic + STMN2_annotated)) %>%
mutate(unc13a_psi = (UNC13A_3prime + UNC13A_5prime + UNC13A_5prime_2)/ (UNC13A_annotated + UNC13A_3prime + UNC13A_5prime + UNC13A_5prime_2))
liu_vafs %>%
mutate(VAF = alt_reads / total) %>%
data.table::melt(id.vars = c("sample",'VAF','total')) %>%
mutate(allele = ifelse(variable == "ref_reads", "C","G")) %>%
mutate(sample_name = sub("_TDP_.*", "", sample)) %>%
mutate(sample_name = gsub("_unsorted", "", sample_name)) %>%
mutate(condition = sub(".*_", "", sample)) %>%
ggplot(aes(x = condition,y = value, fill = allele)) +
geom_col(pch = 21, size = 4) +
facet_wrap(~sample_name,scales = "free")
liu_stmn2 %>%
dplyr::select(stmn2_psi,sample,unc13a_psi) %>%
data.table::melt() %>%
mutate(sample_name = sub("_TDP_.*", "", sample)) %>%
filter(!grepl("unsorted",sample_name)) %>%
mutate(condition = sub(".*_", "", sample)) %>%
mutate(condition = ifelse(grepl("negative",condition), "TDP-43 \nNegative", "TDP-43 \nPositive")) %>%
mutate(variable = ifelse(grepl("stmn2",variable), "STMN2", "UNC13A")) %>%
ggplot(aes(x = condition,y = value, fill = variable)) +
geom_col(size = 4,position = 'dodge') +
facet_wrap(~sample_name,scales = "free_x") +
scale_fill_manual(values = c("#004D40","#882255")) +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(text = element_text(size = 24)) +
xlab("") +
ylab("PSI") +
theme(legend.position = 'top') +
theme(legend.title=element_blank()) +
stat_compare_means(comparisons = list(c("STMN2","UNC13A")))
Using sample as id variables
liu_stmn2 %>%
dplyr::select(stmn2_psi,sample,unc13a_psi) %>%
data.table::melt() %>%
mutate(sample_name = sub("_TDP_.*", "", sample)) %>%
filter(!grepl("unsorted",sample_name)) %>%
mutate(condition = sub(".*_", "", sample)) %>%
mutate(condition = ifelse(grepl("negative",condition), "TDP-43 \nNegative", "TDP-43 \nPositive")) %>%
mutate(variable = ifelse(grepl("stmn2",variable), "STMN2", "UNC13A")) %>%
ggbarplot(,
x = "condition",
add = c("mean_se",'jitter'),
y = "value",
fill = 'variable',
color = 'variable',
position = position_dodge(0.8),dot.size = 6) +
ggpubr::theme_pubr() +
scale_color_manual(values = c("#000000","#000000")) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(text = element_text(size = 24)) +
scale_y_continuous(labels = scales::percent) +
xlab("") +
ylab("PSI") +
theme(legend.position = 'top') +
theme(legend.title=element_blank()) +
stat_compare_means(comparisons = list(c("STMN2","UNC13A")))
Using sample as id variables
liu_stmn2 %>%
mutate(stmn2_psi = STMN2_cryptic / (STMN2_cryptic + STMN2_annotated)) %>%
mutate(unc13a_psi = (UNC13A_3prime + UNC13A_5prime)/ (UNC13A_annotated + UNC13A_3prime + UNC13A_5prime)) %>%
left_join(liu_vafs) %>%
mutate(alt_vaf = (alt_reads)/total) %>%
mutate(condition = ifelse(grepl("negative",sample),"negative", "positive")) %>%
filter(!grepl("unsorted",sample)) %>%
ggplot(aes(y = unc13a_psi,x = stmn2_psi, fill = alt_vaf)) +
geom_point(pch = 21,size = 5) +
ggtitle("STMN2 PSI and the VAF of the G allele \n in the Liu Nuclei ") +
facet_grid(~condition) +
scale_fill_viridis_c(option = "plasma") +
coord_fixed() +
geom_abline()
Joining, by = "sample"
nycg_vafs = fread(file.path(here::here(),"data","NYGC_all_samples_UNC13A_snp_coverage.tsv"),fill = T)
nycg_vafs %>%
left_join(clean_data_table) %>%
filter(rs12973192 == "C/G") %>%
mutate(fraction_risk = alt_reads / total) %>%
filter(!is.na(fraction_risk)) %>%
filter(stmn_2_cryptic_psi_leaf > 0 ) %>%
filter(unc13a_cryptic_leaf_psi > 0 ) %>%
ggplot(aes(x = stmn_2_cryptic_psi_leaf, y = unc13a_cryptic_leaf_psi)) +
geom_point(size = 4, pch = 21, aes(fill = fraction_risk - 0.5)) +
xlab("STMN2 Cryptic PSI") +
ylab("UNC13A CE PSI") +
geom_abline() +
scale_fill_gradient2()
Joining, by = "sample"
nycg_vafs %>%
left_join(clean_data_table) %>%
filter(rs12973192 == "C/G") %>%
mutate(fraction_risk = alt_reads / total) %>%
filter(!is.na(fraction_risk)) %>%
filter(stmn_2_cryptic_psi_leaf > 0 ) %>%
filter(unc13a_cryptic_leaf_psi > 0 ) %>%
mutate(stmn2_unc_fract = ((unc13a_cryptic_leaf_psi * 100) / (stmn_2_cryptic_psi_leaf * 100))) %>%
ggplot(aes(x = stmn2_unc_fract, y = fraction_risk)) +
geom_point(size = 4, pch = 21, aes(fill = fraction_risk - 0.5)) +
xlab("STMN2 Cryptic PSI") +
ylab("UNC13A CE PSI") +
scale_fill_gradient2()
Joining, by = "sample"
TODO: convert all libraries to p_load.
Perform quality control to check that reads are periodic.
read_length_filter <- 29 # length of reads of interest
gencode_v29_info <- read_tsv(paste0(here(), "/data/longest_proteincoding_transcript_hs_details.txt"))
ribo_df <- read_tsv(paste0(here(), "/data/riboseq/sh_tdpb.Aligned.toTranscriptome.out.bam.bed.gz"),
col_names = c("transcript_id", "start0", "end", "strand")) %>%
inner_join(gencode_v29_info, by = "transcript_id") %>%
dplyr::filter(end-start0 == read_length_filter, strand == "+") %>%
mutate(distance_from_start = start0-cds_start) %>%
group_by(distance_from_start) %>%
mutate(n_this_dist = n()) %>%
dplyr::select(distance_from_start, n_this_dist) %>%
unique()
ggplot(ribo_df, aes(x=distance_from_start, y=n_this_dist)) +
geom_line() +
geom_point() +
xlim(-50, 50) +
ylab("Counts") +
xlab("Distance of 5' end from annotated start codon") +
ggtitle(paste0("Periodicity of ", read_length_filter, "nt reads from TDP-43 KD replicate B"))
Use DESeq to detect differential ribosome footprints and create volcano plots
counts_df <- read_csv(paste0(here(), "/data/riboseq/cds_feature_counts.csv"))
counts <- as.matrix(counts_df %>% column_to_rownames("gene_id"))
ipsc_splicing <- read_csv(paste0(here(), "/data/ipsc_splicing_results.csv"))
ipsc_de = read_csv(paste0(here(), "/data/ipsc_differential_expression.csv"))
col_data <- data.frame(sample = colnames(counts)) %>%
mutate(condition = ifelse(str_detect(sample, "tdp"), "tdp_ko", "control"))
dds <- DESeqDataSetFromMatrix(countData = counts,
colData = col_data,
design= ~condition)
dds <- DESeq(dds)
results_name <- resultsNames(dds)[2]
res <- results(dds, name=results_name)
res_df <- data.frame(res) %>%
rownames_to_column("gene_id") %>%
left_join(gencode_v29_info, by = "gene_id")
cryptic_df <- ipsc_splicing %>%
dplyr::select(gene_name, cryptic_junction) %>%
unique() %>%
filter(cryptic_junction)
res_df2 <- res_df %>%
mutate(cryptic_junction = gene_name %in% cryptic_df$gene_name) %>%
mutate(label_junction = case_when(gene_name %in% c("UNC13A","AGRN",
"UNC13B","PFKP","SETD5",
"ATG4B","STMN2","TARDBP") ~ gene_name)) %>%
mutate(colour = ifelse(cryptic_junction, "#fe6101", "#648FFF")) %>%
arrange(colour)
p1 <- ggplot(res_df2, aes(x = log2FoldChange, y = -log10(pvalue), label = label_junction)) +
geom_point(data = res_df2 %>% filter(!cryptic_junction),
aes(x = log2FoldChange, y= -log10(pvalue)), color = "#648FFF") +
geom_point(data = res_df2 %>% filter(cryptic_junction),
aes(x = log2FoldChange, y= -log10(pvalue)), color = "#fe6101") +
ggpubr::theme_pubr() +
ylab("-Log10(p-value)") +
xlab("Log2 fold change") +
geom_text_repel(box.padding = 1.2, max.overlaps = Inf)
p2 <- p1 + ylim(0,10)
p1
p2
Read in data from processed bam files
TODO: add scripts which deduplicate UMIs for each.
specific_rt <- read_csv(paste0(here(), "/data/targeted_RNA_seq/specific_RT_primer/for_plot.csv")) %>%
select(risk, sample_name, n)
random_hexamer <- read_csv(paste0(here(), "/data/targeted_RNA_seq/random_hexamer/values_for_plot.csv")) %>%
select(-sample_name) %>%
select(sample_name = actual_sample_name, risk, n)
tg_rnaseq_df <- bind_rows(specific_rt, random_hexamer) %>%
group_by(sample_name) %>%
mutate(n_healthy = ifelse(risk == "Non-Risk", n, 0)) %>%
mutate(combined_healthy = sum(n_healthy)) %>%
select(-n_healthy) %>%
mutate(combined = sum(n)) %>%
group_by(sample_name, risk) %>%
mutate(combined_n = sum(n)) %>%
ungroup() %>%
select(-n) %>%
unique() %>%
group_by(sample_name) %>%
mutate(p_value = pbinom(combined_healthy, sum(combined_n), 0.5)) %>% # single-tailed bionomial test
ungroup() %>%
mutate(sample_name_ordered = fct_reorder(sample_name, dplyr::desc(combined_n)))
ggplot(tg_rnaseq_df %>%
filter(combined_n > 0), aes(x = sample_name_ordered, y = combined_n, fill = risk)) +
geom_bar(stat="identity", position="dodge") +
geom_text(aes(x=sample_name, y=-2.5, label=signif(p_value,2)), size = 3.5) +
xlab("") +
ylab("Unique cDNAs") +
ggeasy::easy_add_legend_title("Allele") +
theme_minimal() +
ggpubr::theme_pubr() +
ggeasy::easy_rotate_x_labels()
Read in the processed data (following alignment with Bowtie2, and filtering for high-confidence alignments), and generate a plot of the overall distribution of crosslinks, and the rolling average difference between the 2x Risk and 2x Healthy.
Next, filter for peaks in the iCLIP signal, and see how they change in 2x Risk versus 2x Healthy. Highlight those which are close to the SNPs.
TODO: add script which does this filtering TODO: add fasta files for the minigenes
exon_snp <- 556
intron_snp <- 1106
intron_start <- 141
intron_end <- 1429
acceptor1 <- 394
acceptor2 <- 444
donor <- 572
mg_l <- 1624
cpoor_start <- 490
cpoor_end <- 1325
risk_colour <- "#00BFC4"
healthy_colour <- "#F8766D"
confidence_interval <- 0.75 # fracion, i.e. 0.75 = 75%.
peak_threshold <- 5 # how many times above the mean to be a "peak"
region_length <- 50
filtered_df <- read_csv(paste0(here(), "/data/iCLIP/bowtie2_aligned_2_filtered_df.csv"))
rolling_window <- 20
peak_t <- peak_threshold*1000/mg_l # by definition...
df2 <- filtered_df %>%
ungroup() %>%
group_by(condition, pos) %>%
mutate(mean_counts = sum(normalised_counts)/2) %>%
select(pos, condition, mean_counts) %>%
unique() %>%
pivot_wider(names_from = "condition", values_from ="mean_counts")
df2[is.na(df2)] <- 0
df2 <- df2 %>%
mutate(diff = R-H) %>%
mutate(mean_4 = (H+R)/2) %>%
arrange(pos)
top <- ggplot(df2, aes(x=pos, y=diff)) +
geom_area(aes(x=pos, y = mean_4), colour="grey50", fill = "grey50", alpha=0.2) +
geom_vline(xintercept = exon_snp, linetype="dashed") +
geom_vline(xintercept = intron_snp, linetype="dashed") +
xlim(1,1600) +
theme_minimal() +
ylab("XLs per 1000") +
xlab("") +
geom_vline(xintercept = cpoor_start) +
geom_vline(xintercept = cpoor_end)
bottom <- ggplot(df2, aes(x=pos, y=zoo::rollmean(diff, k = rolling_window, na.pad=T))) +
geom_area(colour = "grey50", fill="grey50", alpha = 1) +
geom_vline(xintercept = exon_snp, linetype="dashed") +
geom_vline(xintercept = intron_snp, linetype="dashed") +
xlim(1,1600) +
ylim(-0.5,0.75) +
theme_minimal() +
ylab("Change in XL density") +
xlab("Distance from start of minigene") +
geom_vline(xintercept = intron_start) +
geom_vline(xintercept = intron_end) +
geom_vline(xintercept = acceptor1) +
geom_vline(xintercept = acceptor2) +
geom_vline(xintercept = donor)
combined <- top/bottom
combined
find_ratio_bound <- function(e_x, sem_x, e_y, sem_y, quantile_value, randoms = 10000){
# monte carlo confidence interval estimate for a ratio. Assumes the two variables
# x and y are normally distributed. Finds confidence interval for (x-y)/y
# randomly generate possible global means for x and y
xs = rnorm(randoms, mean = e_x, sd=sem_x) # risk
ys = rnorm(randoms, mean = e_y, sd=sem_y) # healthy
ratios = (xs-ys)/ys
return(quantile(ratios, quantile_value))
}
peak_df <- filtered_df %>%
ungroup() %>%
group_by(pos) %>%
mutate(max_at_pos = max(normalised_counts)) %>% # at least 1 of the 4 samples above the threshold
ungroup() %>%
filter(max_at_pos >= peak_t) %>% # at least 1 of the 4 samples above the threshold
group_by(condition, pos) %>%
mutate(peak_mean = mean(normalised_counts),
peak_sd = sd(normalised_counts)) %>%
ungroup() %>%
select(condition, pos, peak_mean, peak_sd) %>%
unique() %>%
pivot_wider(names_from = "condition", values_from = c("peak_mean", "peak_sd")) %>%
mutate(frac_increase = (peak_mean_R-peak_mean_H)/peak_mean_H) %>%
mutate(peak_rank = rank(frac_increase)) %>%
mutate(colour = ifelse(abs(pos-exon_snp)<region_length, "gold",
ifelse(abs(pos-intron_snp)<region_length, "purple", "grey50")))
peak_df$upper_bound <- mapply(find_ratio_bound,
peak_df$peak_mean_R,
peak_df$peak_sd_R/sqrt(2), #convert to SEM
peak_df$peak_mean_H,
peak_df$peak_sd_H/sqrt(2), #convert to SEM
0.5*(1+confidence_interval))
peak_df$lower_bound <- mapply(find_ratio_bound,
peak_df$peak_mean_R,
peak_df$peak_sd_R/sqrt(2), #convert to SEM
peak_df$peak_mean_H,
peak_df$peak_sd_H/sqrt(2), #convert to SEM
0.5*(1-confidence_interval))
ggplot(peak_df, aes(x = peak_rank, y = 100*frac_increase, fill=colour)) +
geom_bar(stat="identity") +
scale_fill_identity() +
theme_minimal() +
ggpubr::theme_pubr()+
ylab("% change in 2x Risk") +
xlab("Rank") +
geom_errorbar(aes(ymin = 100*lower_bound, ymax = 100*upper_bound, x = peak_rank),
width = 0.3)
Supplementary iCLIP Figures
bars <- filtered_df %>%
ungroup() %>%
group_by(condition, pos) %>%
mutate(mean_normalised = mean(normalised_counts),
sd_normalised = sd(normalised_counts)) %>%
mutate(colour = ifelse(condition == "H", healthy_colour, risk_colour))
bar_top <- ggplot(bars %>% filter(abs(pos-exon_snp) < 35), aes(x = pos, y = mean_normalised,
fill = colour)) +
geom_bar(stat="identity", position="dodge") +
scale_fill_identity() +
geom_errorbar(aes(ymin = mean_normalised-sd_normalised, ymax=mean_normalised+sd_normalised),
position="dodge") +
geom_vline(xintercept = exon_snp, linetype="dashed") +
ylab("XLs per 1000") +
ggpubr::theme_pubr() +
ggtitle("iCLIP signal around CE SNP") +
xlab("")
bar_bottom <- ggplot(bars %>% filter(abs(pos-intron_snp) < 35), aes(x = pos, y = mean_normalised,
fill = colour)) +
geom_bar(stat="identity", position="dodge") +
scale_fill_identity() +
geom_errorbar(aes(ymin = mean_normalised-sd_normalised, ymax=mean_normalised+sd_normalised),
position="dodge") +
geom_vline(xintercept = intron_snp, linetype="dashed") +
ggpubr::theme_pubr() +
ylab("XLs per 1000") +
ggtitle("iCLIP signal around intronic SNP") +
xlab("")
bar_combined <- bar_top / bar_bottom
bar_combined
Analyse E-values (higher E value = stronger binding enrichment) for the motifs present at the exon SNP.
# Read in supplementary 4 from Ray et al 2013
data <- read_tsv(paste0(here(), "/data/heptamers/41586_2013_BFnature12311_MOESM334_ESM.txt"))
for(type in c("exon", "intron")){
if(type == "exon"){
snp <- "TAAAAGCATGGAT"
healthy <- "TAAAAGGATGGAT"
} else {
snp <- "GGATGGGTGGATA"
healthy <- "GGATGGTTGGATA"
}
all_healthy <- c()
all_risk <- c()
# Make dataframe containing E scores for each relevant 7mer
for(i in 1:(str_length(snp)-6)){
this_snp <- str_sub(snp, i, i+6)
this_healthy <- str_sub(healthy, i, i+6)
all_healthy <- c(all_healthy, str_replace_all(this_healthy, "T", "U"))
all_risk <- c(all_risk, str_replace_all(this_snp, "T", "U"))
row1 <- data %>% filter(`7mer` == str_replace_all(this_snp, "T", "U")) %>%
mutate(snp = T, pos = i)
row2 <- data %>% filter(`7mer` == str_replace_all(this_healthy, "T", "U")) %>%
mutate(snp = F, pos = i)
if(i == 1){
full <- bind_rows(row1, row2)
} else {
full <- bind_rows(full, row1, row2)
}
}
# Find average change in E score for the two groups of heptamers
full2 <- full %>%
group_by(pos) %>%
filter(n() == 2) %>%
ungroup() %>%
pivot_longer(cols = contains("set")) %>%
select(-`7mer`) %>%
pivot_wider(names_from = "snp", values_from = "value") %>%
mutate(abs_change = `TRUE`-`FALSE`) %>%
filter(!is.na(`TRUE`) & !is.na(`FALSE`)) %>%
group_by(name) %>%
mutate(mean_abs_change = mean(abs_change)) %>%
select(name, mean_abs_change) %>%
unique() %>%
arrange(mean_abs_change) %>%
mutate(is_tdp43 = ifelse(str_detect(name, "RNCMPT00076"), "red", "grey80")) %>%
mutate(short_name = str_sub(name, 1, 11)) %>%
ungroup() %>% group_by(short_name) %>%
mutate(average_delta = mean(mean_abs_change)) %>%
select(short_name, is_tdp43, average_delta) %>%
unique()
# Plot ranks
plot <- ggplot(full2, aes(x=rank(average_delta), y = average_delta,
fill = is_tdp43)) +
geom_bar(stat="identity") +
scale_fill_identity() +
theme_minimal() +
ggtitle(paste0("Average change in binding E value for each dataset with SNP for ", type, " SNP")) +
xlab("") +
ylab("Average change in E score") +
ggpubr::theme_pubr() +
xlab("Rank")
print(plot)
full_just_tdp <- full %>%
group_by(pos) %>%
filter(n() == 2) %>%
ungroup() %>%
select(`7mer`, contains("RNCMPT00076"), snp, pos) %>%
mutate(mean_E = (RNCMPT00076_e_setA + RNCMPT00076_e_setB)/2) %>%
pivot_longer(cols = contains("RNCMP")) %>%
mutate(ypos = ifelse(snp, -0.35, -0.27))
plot2 <- ggplot(full_just_tdp, aes(x = pos, y = value, colour = snp,
label = `7mer`)) +
geom_point(size = 3) +
xlab("7mer") +
ylab("E value (similar to rank?)") +
ggeasy::easy_add_legend_title("Contains Risk SNP") +
ggtitle(paste0("Binding E value for TDP43 for all 7x 7mers with or without risk SNP for ", type, " SNP")) +
geom_label(aes(y = ypos), size = 2.7) +
xlim(0.5, 7.5) +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
ggeasy::easy_remove_x_axis()
print(plot2)
}
Read in values
df <- read_csv(paste0(here(),
"/data/itc/itc final values.csv")) %>%
group_by(RNA) %>%
mutate(sd =sd(kd)) %>%
mutate(mean_kd = mean(kd))
ggplot(df, aes(x = RNA, y = kd)) +
geom_bar(aes(x = RNA, y = mean_kd), stat="identity", position="dodge") +
geom_point() +
geom_errorbar(aes(x=RNA, ymin = mean_kd-sd, ymax = mean_kd+sd),
width = 0.3) +
ylim(0, NA) +
ggpubr::theme_pubr() +
ylab("Kd / nM") +
xlab("")
t.test(df$kd[which(df$RNA=="H")], df$kd[which(df$RNA=="R")])
Welch Two Sample t-test
data: df$kd[which(df$RNA == "H")] and df$kd[which(df$RNA == "R")]
t = -3.172, df = 5.2878, p-value = 0.02291
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-53.065478 -5.984522
sample estimates:
mean of x mean of y
48.325 77.850